package com.meiyuetao.myt.crawl.filter;

import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;

import lab.s2jh.crawl.filter.ParseFilterChain;

import org.apache.commons.lang3.StringUtils;
import org.joda.time.DateTime;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.util.Assert;

import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.google.common.collect.Lists;
import com.meiyuetao.myt.crawl.entity.ParseCommodity;

public class SuNingParseFilter extends AbstractCommodityParseFilter {

    private static final Logger logger = LoggerFactory.getLogger(SuNingParseFilter.class);

    private final static int MAX_REVIEW_PAGES = 5;

    private static final String[] ALERT_DOMAIN_INFOS = { "suning.com", "redbaby.com" };

    @Override
    public void doFilterInternal(String url, ParseFilterChain filterChain) {
        logger.debug("Invoking {} ...", this.getClass());
        HtmlPage htmlPage = fetchHtmlPage(url);

        String sn = null;
        Matcher matcher = this.urlMatchPattern.matcher(url);
        if (matcher.find()) {
            sn = matcher.group(1);
        }
        Assert.notNull(sn);
        List<?> cateList = htmlPage.getByXPath("//DIV[@class='path w cityId_replace']//A");
        List<String> categories = Lists.newArrayList();
        for (int i = 0; i < cateList.size() - 1; i++) {
            HtmlElement node = (HtmlElement) cateList.get(i);
            categories.add(node.getTextContent().trim());
        }
        ParseCommodity parseCommodity = parseCommodityService.findByProperty("baseUrl", url);
        if (parseCommodity == null) {
            parseCommodity = new ParseCommodity(url);
            // 设置来源分组标识，一般取对应站点主域名即可
            parseCommodity.setSourceType("suning.com");
            // 设置商品源编码
            parseCommodity.setSourceCode(sn);
            parseCommodity.setUid(url);

        }
        parseCommodity.reset();
        /*
         * if (true) { Commodity commodity =
         * commodityService.findByProperty("sourceUrl", url); if (commodity !=
         * null) {
         * 
         * //评论 //http://club.jd.com/review/623278-0-1-0.html
         * //http://club.jd.com/review/623278-0-2-0.html boolean breakLoop =
         * false; //获取最近评论,用于判断抓取评论是否已存在 GroupPropertyFilter groupPropertyFilter
         * = GroupPropertyFilter.buildDefaultGroupFilter(); List<PropertyFilter>
         * filters = PropertyFilter.buildDefaultFilterList(); filters.add(new
         * PropertyFilter(MatchType.EQ, "commodity", commodity));
         * groupPropertyFilter.setFilters(filters); List<CommodityComment>
         * commodityComments =
         * commodityCommentService.findByFilters(groupPropertyFilter, new
         * org.springframework.data.domain.Sort(Direction.DESC,
         * "sid")).subList(0, 5);
         * 
         * for (int pager = 1; pager <= MAX_REVIEW_PAGES; pager++) { if
         * (breakLoop) { break; } String reviewUrl =
         * "http://club.jd.com/review/" + sn + "-0-" + pager + "-0.html";
         * logger.debug("Processing parse customer review page: {}", reviewUrl);
         * //基于Htmlunit抓取页面，并且后续用Htmlunit语法获取相关元素值 HtmlPage page =
         * fetchHtmlPage(reviewUrl);
         * 
         * List<?> nodeList =
         * page.getByXPath("//DIV[@id='comments-list']//DIV[@class='mc']");
         * //判断如果当前页面已经没有评论行项元素，则终止本商品评论抓取循环 if (nodeList == null ||
         * nodeList.size() == 0) { break; }
         * 
         * for (int i = 0; i < nodeList.size(); i++) { HtmlElement node =
         * (HtmlElement) nodeList.get(i);
         * //获取当前行项元素标识（主要用于处理XPath语法每次都是从Document获取元素的Bug
         * ，因此需要在XPath变量上面添加行项标识信息） String itemDivId = node.getAttribute("id");
         * String uname = getXPathValue(node, "//DIV[@id='" + itemDivId +
         * "']//DIV[@class='u-name']/A"); // HtmlElement uNode =
         * node.getFirstByXPath("//DIV[@id='" + itemDivId // +
         * "']//DIV[@class='u-name']/A"); // String ulink =
         * uNode.getAttribute("href"); String dateComment = getXPathValue(node,
         * "//DIV[@id='" + itemDivId + "']//SPAN[@class='date-comment']");
         * //判断当前评论是否已存在 CommodityComment commodityComment = null; if
         * (commodityComments != null) { for (CommodityComment review :
         * commodityComments) { if (DigestUtils.md5DigestAsHex((uname +
         * dateComment).getBytes()).equals( review.getIdentification())) {
         * commodityComment = review; break; } } } //已存在评论,则终止整个循环 if
         * (commodityComment != null) { breakLoop = true; break; }
         * commodityComment = new CommodityComment(); //用户名
         * commodityComment.setDisplayName(uname); //标题 String title =
         * getXPathValue(node, "//DIV[@id='" + itemDivId +
         * "']//DIV[@class='o-topic']//STRONG[@class='topic']//A");
         * commodityComment.setTitle(title); List<?> commentContentList =
         * node.getByXPath("//DIV[@id='" + itemDivId +
         * "']//DIV[@class='comment-content']//DL");
         * 
         * if (commentContentList != null && commentContentList.size() > 0) {
         * for (int j = 0; j < commentContentList.size(); j++) { HtmlElement
         * contentNode = (HtmlElement) commentContentList.get(j);
         * contentNode.setAttribute("id", "dl" + j); String name =
         * getXPathValue(contentNode, "//DL[@id='dl" + j + "']//DT"); String
         * value = getXPathValue(contentNode, "//DL[@id='dl" + j + "']//DD"); if
         * (name.equals("优 点：")) { commodityComment.setAdvantage(value); } if
         * (name.equals("不 足：")) { commodityComment.setDisadvantage(value); } if
         * (name.equals("心 得：")) { commodityComment.setGainedKnowledge(value); }
         * if (name.equals("购买日期：")) {
         * //commodityComment.setBoughtTime(DateTools.stringToDate(value)); try
         * { commodityComment.setBoughtTime(DateUtils.parseDate(value, new
         * String[] { "yyyy-MM-dd" })); } catch (ParseException e) { // TODO
         * Auto-generated catch block e.printStackTrace(); } }
         * contentNode.removeAttribute("id"); } } //用户名,标题,有点,缺点,心得.包含敏感词的,不记录评论
         * if (isContainSensitiveWord(commodityComment.getTitle()) ||
         * isContainSensitiveWord(commodityComment.getDisplayName()) ||
         * isContainSensitiveWord(commodityComment.getAdvantage()) ||
         * isContainSensitiveWord(commodityComment.getDisadvantage()) ||
         * isContainSensitiveWord(commodityComment.getGainedKnowledge())) {
         * continue;
         * 
         * } //商品sid commodityComment.setCommodity(commodity); ; //评论时间 try {
         * commodityComment.setPublishTime(DateUtils.parseDate(dateComment, new
         * String[] { "yyyy-MM-dd HH:mm" })); } catch (ParseException e) { //
         * TODO Auto-generated catch block e.printStackTrace(); } //购买时间
         * commodityComment
         * .setBoughtTime(createBoughtTime(commodityComment.getPublishTime()));
         * commodityComment.setCommentFrom("jd.com"); //标识评论唯一性字段
         * commodityComment .setIdentification(DigestUtils.md5DigestAsHex((uname
         * + dateComment).getBytes()));
         * 
         * commodityComment.setShowPicCount(0);
         * commodityComment.setReplyCount(0);
         * commodityComment.setUsefulCount(0);
         * commodityComment.setUselessCount(0);
         * 
         * HtmlElement starNode = node.getFirstByXPath("//DIV[@id='" + itemDivId
         * + "']//DIV[@class='o-topic']/SPAN[1]"); String startStr =
         * starNode.getAttribute("class"); startStr = startStr.substring(7,
         * startStr.length()); commodityComment.setEvalValue(new
         * BigDecimal(startStr)); HtmlElement commentNode =
         * node.getFirstByXPath("//DIV[@id='" + itemDivId +
         * "']//DIV[@class='comment-content']");
         * 
         * commodityComment.setCommentContent(commentNode.toString());
         * commodityCommentService.save(commodityComment); } } } }
         */
        // 商品所属分类路径
        parseCommodity.setCategoryPath(StringUtils.join(categories, ">"));
        String title = parseTitle(htmlPage, "//DIV[@class='product-main-title']/H1");
        parseCommodity.setTitle(title);

        // 销售价格属性处理
        parseSalePrice(parseCommodity, htmlPage, "//SPAN[@id='netPrice']", "//SPAN[@id='netPrice']/EM");
        // 商品描述
        parseDescription(parseCommodity, htmlPage, "//DIV[@id='productDetail']", "//DIV[@id='productDetail']//IMG", "src2", ALERT_DOMAIN_INFOS);
        // 橱窗图,
        parseWindowImgs(parseCommodity, htmlPage, "//DIV[@id='preView_box']//DIV[@class='thumbnai-box']/UL//IMG", "//DIV[@id='PicView']//A[@id='bigImg']//IMG", "data-lazyload");
        // 促销口号
        parseSalePrompt(parseCommodity, htmlPage, "//DIV[@id='product-promotions']");
        /*
         * //库存属性处理 parseSaleStock(parseCommodity, htmlPage,
         * "//SPAN[@id='c_kucun']", "//P[@id='c_kucun']");
         */

        logger.debug("Saving Parse Commodity: {}", parseCommodity);
        DateTime dTime = new DateTime();
        parseCommodity.setLastFetchTime(dTime.getMillis());
        parseCommodity.setLastFetchTimeLabel(dTime.toString("yyyy-MM-dd HH:mm:ss"));
        parseCommodityService.save(parseCommodity);

    }

    @Override
    public Map<String, Object> parseSimpleData(String url) {
        if (isAcceptUrl(url)) {

        }
        return null;
    }

}
